{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "563b3e80",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from snownlp import SnowNLP\n",
    "import matplotlib.pyplot as plt\n",
    "plt.rcParams['font.sans-serif']=['SimHei']\n",
    "plt.rcParams['axes.unicode_minus'] = False \n",
    "import seaborn as sns\n",
    "sns.set_style('whitegrid',{'font.sans-serif':['simhei','Arial']})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "e4a8ee16",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "原始数据 1\n",
      "                          标题      日期\n",
      "0      十大博客看后市：2600点下方将现低吸机会  12月07日\n",
      "1              杨德龙：跨年度行情能否出现  12月07日\n",
      "2         午后名博看市：2600点得失至关重要  12月07日\n",
      "3  李大霄：2449点成为A股历史第五个底部正变为事实  12月07日\n",
      "4  杨德龙：2019年可能会出现“美股向下、A股向上”  12月07日\n",
      "\n"
     ]
    }
   ],
   "source": [
    "orig_comments=pd.read_csv(r'stocktextming.csv')\n",
    "orig_comments.drop(\"日期时间\",axis=1,inplace=True)\n",
    "print('原始数据',1)\n",
    "print(orig_comments.head())\n",
    "print()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "aaa39692",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "情绪得分\n",
      "                          标题      日期        情绪\n",
      "0      十大博客看后市：2600点下方将现低吸机会  12月07日  0.108485\n",
      "1              杨德龙：跨年度行情能否出现  12月07日  0.901201\n",
      "2         午后名博看市：2600点得失至关重要  12月07日  0.523612\n",
      "3  李大霄：2449点成为A股历史第五个底部正变为事实  12月07日  0.308116\n",
      "4  杨德龙：2019年可能会出现“美股向下、A股向上”  12月07日  0.667355\n",
      "\n"
     ]
    }
   ],
   "source": [
    "orig_comments['情绪']=None\n",
    "lenOrig=len(orig_comments)\n",
    "i=0\n",
    "while(i<lenOrig):\n",
    "    s=SnowNLP(orig_comments.iloc[i,0]).sentiments\n",
    "    orig_comments.iloc[i,2]=s\n",
    "    i=i+1\n",
    "print('情绪得分')\n",
    "print(orig_comments.head())\n",
    "print()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "a5f7648b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "每日评论平均情绪得分：\n",
      "              情绪  计数      情绪平均\n",
      "日期                            \n",
      "09月03日  0.397172   2  0.992931\n",
      "09月04日  7.164106  11  3.256412\n",
      "09月05日  5.696181   8  3.560113\n",
      "09月06日  4.476095   7  3.197211\n",
      "09月07日  4.904907   8  3.065567\n",
      "\n"
     ]
    }
   ],
   "source": [
    "numberByDay=orig_comments['情绪'].groupby(orig_comments['日期']).count()\n",
    "emotionByDay=orig_comments['情绪'].groupby(orig_comments['日期']).sum()\n",
    "markByDay=pd.DataFrame()\n",
    "markByDay['情绪']=emotionByDay\n",
    "markByDay['计数']=numberByDay\n",
    "markByDay['情绪平均']=markByDay['情绪']/markByDay['计数']*5\n",
    "print('每日评论平均情绪得分：')\n",
    "print(markByDay.head())\n",
    "print()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "7f17be8a",
   "metadata": {},
   "outputs": [],
   "source": [
    "markByDay['order']=markByDay.index\n",
    "markByDay['日期']=None\n",
    "\n",
    "lenMBD=len(markByDay)\n",
    "i=0\n",
    "while(i<lenMBD):\n",
    "    str1=markByDay.iloc[i,3]\n",
    "    str2='2018-'+str1[0:2] +'-'+str1[3:5]\n",
    "    markByDay.iloc[i,4]=str2\n",
    "    i=i+1\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "c8601888",
   "metadata": {},
   "outputs": [
    {
     "ename": "FileNotFoundError",
     "evalue": "[Errno 2] No such file or directory: 'C:\\\\Users\\\\administered\\\\Desktop\\\\sh.csv'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[8], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m shMarket \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mread_csv(\u001b[38;5;124mr\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mC:\u001b[39m\u001b[38;5;124m\\\u001b[39m\u001b[38;5;124mUsers\u001b[39m\u001b[38;5;124m\\\u001b[39m\u001b[38;5;124madministered\u001b[39m\u001b[38;5;124m\\\u001b[39m\u001b[38;5;124mDesktop\u001b[39m\u001b[38;5;124m\\\u001b[39m\u001b[38;5;124msh.csv\u001b[39m\u001b[38;5;124m'\u001b[39m,encoding \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mutf-8\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m      2\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m上证指数：\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m      3\u001b[0m \u001b[38;5;28mprint\u001b[39m(shMarket\u001b[38;5;241m.\u001b[39mhead())\n",
      "File \u001b[1;32mE:\\software\\learn\\Ana\\Lib\\site-packages\\pandas\\io\\parsers\\readers.py:912\u001b[0m, in \u001b[0;36mread_csv\u001b[1;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[0;32m    899\u001b[0m kwds_defaults \u001b[38;5;241m=\u001b[39m _refine_defaults_read(\n\u001b[0;32m    900\u001b[0m     dialect,\n\u001b[0;32m    901\u001b[0m     delimiter,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    908\u001b[0m     dtype_backend\u001b[38;5;241m=\u001b[39mdtype_backend,\n\u001b[0;32m    909\u001b[0m )\n\u001b[0;32m    910\u001b[0m kwds\u001b[38;5;241m.\u001b[39mupdate(kwds_defaults)\n\u001b[1;32m--> 912\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _read(filepath_or_buffer, kwds)\n",
      "File \u001b[1;32mE:\\software\\learn\\Ana\\Lib\\site-packages\\pandas\\io\\parsers\\readers.py:577\u001b[0m, in \u001b[0;36m_read\u001b[1;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[0;32m    574\u001b[0m _validate_names(kwds\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnames\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[0;32m    576\u001b[0m \u001b[38;5;66;03m# Create the parser.\u001b[39;00m\n\u001b[1;32m--> 577\u001b[0m parser \u001b[38;5;241m=\u001b[39m TextFileReader(filepath_or_buffer, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwds)\n\u001b[0;32m    579\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m chunksize \u001b[38;5;129;01mor\u001b[39;00m iterator:\n\u001b[0;32m    580\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m parser\n",
      "File \u001b[1;32mE:\\software\\learn\\Ana\\Lib\\site-packages\\pandas\\io\\parsers\\readers.py:1407\u001b[0m, in \u001b[0;36mTextFileReader.__init__\u001b[1;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[0;32m   1404\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m kwds[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m   1406\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles: IOHandles \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m-> 1407\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_engine \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_engine(f, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mengine)\n",
      "File \u001b[1;32mE:\\software\\learn\\Ana\\Lib\\site-packages\\pandas\\io\\parsers\\readers.py:1661\u001b[0m, in \u001b[0;36mTextFileReader._make_engine\u001b[1;34m(self, f, engine)\u001b[0m\n\u001b[0;32m   1659\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m mode:\n\u001b[0;32m   1660\u001b[0m         mode \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m-> 1661\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;241m=\u001b[39m get_handle(\n\u001b[0;32m   1662\u001b[0m     f,\n\u001b[0;32m   1663\u001b[0m     mode,\n\u001b[0;32m   1664\u001b[0m     encoding\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mencoding\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m),\n\u001b[0;32m   1665\u001b[0m     compression\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcompression\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m),\n\u001b[0;32m   1666\u001b[0m     memory_map\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmemory_map\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mFalse\u001b[39;00m),\n\u001b[0;32m   1667\u001b[0m     is_text\u001b[38;5;241m=\u001b[39mis_text,\n\u001b[0;32m   1668\u001b[0m     errors\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mencoding_errors\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstrict\u001b[39m\u001b[38;5;124m\"\u001b[39m),\n\u001b[0;32m   1669\u001b[0m     storage_options\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstorage_options\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m),\n\u001b[0;32m   1670\u001b[0m )\n\u001b[0;32m   1671\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m   1672\u001b[0m f \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles\u001b[38;5;241m.\u001b[39mhandle\n",
      "File \u001b[1;32mE:\\software\\learn\\Ana\\Lib\\site-packages\\pandas\\io\\common.py:868\u001b[0m, in \u001b[0;36mget_handle\u001b[1;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[0;32m    859\u001b[0m         handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mopen\u001b[39m(\n\u001b[0;32m    860\u001b[0m             handle,\n\u001b[0;32m    861\u001b[0m             ioargs\u001b[38;5;241m.\u001b[39mmode,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    864\u001b[0m             newline\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m    865\u001b[0m         )\n\u001b[0;32m    866\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m    867\u001b[0m         \u001b[38;5;66;03m# Binary mode\u001b[39;00m\n\u001b[1;32m--> 868\u001b[0m         handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mopen\u001b[39m(handle, ioargs\u001b[38;5;241m.\u001b[39mmode)\n\u001b[0;32m    869\u001b[0m     handles\u001b[38;5;241m.\u001b[39mappend(handle)\n\u001b[0;32m    871\u001b[0m \u001b[38;5;66;03m# Convert BytesIO or file objects passed with an encoding\u001b[39;00m\n",
      "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'C:\\\\Users\\\\administered\\\\Desktop\\\\sh.csv'"
     ]
    }
   ],
   "source": [
    "shMarket = pd.read_csv(r'C:\\Users\\administered\\Desktop\\sh.csv',encoding = 'utf-8')\n",
    "print('上证指数：')\n",
    "print(shMarket.head())\n",
    "szMarket = pd.read_csv(r'C:\\Users\\administered\\Desktop\\sz.csv',encoding = 'utf-8')\n",
    "print('深证指数：')\n",
    "print(szMarket.head())\n",
    "print()\n",
    "Market=pd.DataFrame()\n",
    "Market['日期']=shMarket['date']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0c9f948b",
   "metadata": {},
   "outputs": [],
   "source": [
    "Market['上证波动']=shMarket['p_change']\n",
    "Market['深证波动']=szMarket['p_change']\n",
    "print('New')\n",
    "print(Market.head())\n",
    "\n",
    "markByDay.set_index('日期',inplace=True)\n",
    "Market.set_index('日期',inplace=True)\n",
    "result = Market.join(markByDay)\n",
    "\n",
    "print('股市波动与评论情绪变化对比:')\n",
    "plt.figure(figsize=(10,8))\n",
    "\n",
    "plt.plot(result['上证波动'],'r-',label='上证波动',linewidth=1)\n",
    "plt.plot(result['深证波动'],'k:',label='深证波动',linewidth=3)\n",
    "plt.plot(result['情绪平均'],'b-.',label='情绪波动',linewidth=3)\n",
    "plt.title('三种波动对比',fontsize=20)\n",
    "plt.xlabel('交易日期',fontsize=15)\n",
    "plt.ylabel('波动率',fontsize=15)\n",
    "plt.legend()\n",
    "plt.savefig('stockTextMing.png',dpi=300,bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4d1d4253",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
